library(ggplot2)
library(patchwork)
library(ggpubr)
g2500_s5000_c0p001 <- read.csv("~/mccoyLab_withOthers/transmission-distortion/assess_varying_windows/g2500_s5000_c0.001_assessed.csv" , row.names=1)
g2500_s5000_c0p01 <- read.csv("~/mccoyLab_withOthers/transmission-distortion/assess_varying_windows/g2500_s5000_c0.01_assessed.csv", row.names=1)
g2500_s5000_c0p1 <- read.csv("~/mccoyLab_withOthers/transmission-distortion/assess_varying_windows/g2500_s5000_c0.1_assessed.csv", row.names=1)
g2500_s5000_c0p223 <- read.csv("~/mccoyLab_withOthers/transmission-distortion/assess_varying_windows/g2500_s5000_c0.223_assessed.csv", row.names=1)
g2500_s5000_c0p693 <- read.csv("~/mccoyLab_withOthers/transmission-distortion/assess_varying_windows/g2500_s5000_c0.693_assessed.csv", row.names=1)
g1000_s30000_c0p01 <- read.csv("~/mccoyLab_withOthers/transmission-distortion/assess_varying_windows/g1000_s30000_c0.01_assessed.csv", row.names=1)
g50_s100000_c0p511 <- read.csv("~/mccoyLab_withOthers/transmission-distortion/assess_varying_windows/g50_s100000_c0.511_assessed.csv", row.names=1)
load("~/mccoyLab_withOthers/transmission-distortion/test_data_rhapsodi_gen/true_nsnp_gen_model_noDNM.Rdata")
get_true_nsnp <- function(true_nsnp_dict, num_snps, num_gams, coverage, seqerr, avg_recomb, rsd){
true_nsnp <- true_nsnp_dict[[format(num_snps, scientific=FALSE, trim=TRUE)]][[as.character(num_gams)]][[as.character(coverage)]][[as.character(seqerr)]][[as.character(avg_recomb)]][[as.character(rsd)]]
return(data.frame(true_nsnp = true_nsnp))
}
get_num_windows <- function(true_nsnp, window_size, overlap_denom){
true_nsnp <- as.integer(true_nsnp)
window_size <- as.integer(window_size)
overlap_denom <- as.numeric(overlap_denom)
vector_of_positions <- 1:true_nsnp
overlap <- window_size %/% overlap_denom #Degree of overlap between segments
starts = seq(1, length(vector_of_positions), by = window_size - overlap)
ends = starts + window_size - 1
ends[ends > length(vector_of_positions)] = length(vector_of_positions)
windows <- lapply(1:length(starts), function(i) vector_of_positions[starts[i]:ends[i]])
#merge the last two windows to avoid edge effect
if (length(windows) > 1){
combined <- unique(c(windows[[length(windows) - 1]], windows[[length(windows)]]))
combined <- combined[order(combined)]
total_combined <- windows[-c((length(windows) - 1), length(windows))]
total_combined[[length(total_combined) + 1]] <- combined
windows <- total_combined
}
return(data.frame(num_windows = length(windows)))
}
get_centimorgan_per_window <- function(avg_recomb, num_windows){
avg_recomb <- as.numeric(avg_recomb)
num_windows <- as.integer(num_windows)
centimorgan_per_window <- avg_recomb * 100 / num_windows
return(data.frame(cmpw = centimorgan_per_window))
}
run_process <- function(true_nsnp_dict, dfoi, title_val){
#add true nsnp
dfoi$true_nsnp <- do.call(rbind, lapply(1:nrow(dfoi),
function(x) get_true_nsnp(true_nsnp_dict,
dfoi$nsnp[x],
dfoi$ngam[x],
dfoi$cov[x],
dfoi$se[x],
dfoi$avgr[x],
dfoi$rsd[x])))
#add num windows
dfoi$num_windows <- do.call(rbind, lapply(1:nrow(dfoi),
function(x) get_num_windows(dfoi$true_nsnp[[1]][x],
dfoi$window_length[x],
dfoi$overlap_denom[x])))
#add centimorgans per window
dfoi$cmpw <- do.call(rbind, lapply(1:nrow(dfoi),
function(x) get_centimorgan_per_window(dfoi$avgr[x], dfoi$num_windows[[1]][x])))
g1 <- ggplot(dfoi, aes(x=log(cmpw[[1]]), y=phasing_acc, col=as.factor(window_length))) +
facet_wrap(avgr~se, scales="free") +
geom_point() +
theme_bw() +
theme(panel.grid = element_blank(), panel.background=element_blank(), legend.position = 'none') +
ylab("Phasing Accuracy") + xlab('ln(centimorgans/window)') +
ggtitle(paste0(title_val, "ACC"))
g2 <- ggplot(dfoi, aes(x=log(cmpw[[1]]), y=phasing_ser, col=as.factor(window_length))) +
facet_wrap(avgr~se, scales="free") +
geom_point() +
theme_bw() +
theme(panel.grid = element_blank(), panel.background=element_blank(), legend.position = 'none') +
ylab("Phasing SER") + xlab('ln(centimorgans/window)') +
ggtitle(paste0(title_val, "SER"))
g3 <- get_legend(ggplot(dfoi,aes(x=log(cmpw[[1]]),y=phasing_ser,col=as.factor(window_length))) +
geom_point() +
facet_wrap(avgr~se, scales="free") +
theme_bw() +
theme(panel.grid = element_blank(), panel.background=element_blank()) +
ggtitle(paste0(title_val, "legend")))
g3 <- as_ggplot(g3)
return(list(g1=g1, g2=g2, g3=g3, dfoi=dfoi))
}
g2500_s5000_c0p001_out <- run_process(true_nsnp_dict, g2500_s5000_c0p001, "g2500_s5000_c0.001")
g2500_s5000_c0p001_out$g1

g2500_s5000_c0p001_out$g2

g2500_s5000_c0p001_out$g3

g2500_s5000_c0p01_out <- run_process(true_nsnp_dict, g2500_s5000_c0p01, "g2500_s5000_c0.01")
g2500_s5000_c0p01_out$g1

g2500_s5000_c0p01_out$g2

g2500_s5000_c0p01_out$g3

g2500_s5000_c0p1_out <- run_process(true_nsnp_dict, g2500_s5000_c0p1, "g2500_s5000_c0.1")
g2500_s5000_c0p1_out$g1

g2500_s5000_c0p1_out$g2

g2500_s5000_c0p1_out$g3

g2500_s5000_c0p223_out <- run_process(true_nsnp_dict, g2500_s5000_c0p223, "g2500_s5000_c0.223")
g2500_s5000_c0p223_out$g1

g2500_s5000_c0p223_out$g2

g2500_s5000_c0p223_out$g3

g2500_s5000_c0p693_out <- run_process(true_nsnp_dict, g2500_s5000_c0p693, "g2500_s5000_c0.693")
g2500_s5000_c0p693_out$g1

g2500_s5000_c0p693_out$g2

g2500_s5000_c0p693_out$g3

g1000_s30000_c0p01_out <- run_process(true_nsnp_dict, g1000_s30000_c0p01, "g1000_s30000_c0.01")
g1000_s30000_c0p01_out$g1

g1000_s30000_c0p01_out$g2

g1000_s30000_c0p01_out$g3

g50_s100000_c0p511_out <- run_process(true_nsnp_dict, g50_s100000_c0p511, "g50_s100000_c0.511")
g50_s100000_c0p511_out$g1

g50_s100000_c0p511_out$g2

g50_s100000_c0p511_out$g3
